%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% This file is part of the book
%%
%% Probability and Its Applications
%% http://code.google.com/p/probability-book/
%%
%% Copyright (C) 2010 Minh Van Nguyen <nguyenminh2@gmail.com>
%%
%% See the file COPYING for copying conditions.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\chapter{Chernoff Bounds}
\index{Chernoff bound}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Moment generating functions}

A \emph{generating function}\index{function!generating} for an
infinite sequence $(g_0, g_1, g_2, \dots)$ is an infinite series of
the form
\[
G(z)
=
\sum_{i \geq 0} g_i z^i.
\]
The
\emph{moment generating function}\index{function!moment generating} of
a random variable $X$ is given by
\[
M_X(t)
=
\E\big[e^{tX}\big].
\]
The function $M_X(t)$ captures all the moments of $X$.
{\color{red} Does this mean that we have the following?
\[
M_X(t)
=
\E[e^{tX}]
\overset{?}{=}
\sum_i e^{tx_i} \cdot \Pr(X = x_i)
\]
}

\begin{theorem}
Consider a random variable $X$ with moment generating function
$M_X(t)$. Suppose that it is legitimate to exchange the expectation
and differentiation operands. Then for all integers $n > 1$ we have
\[
\E[X^n]
=
M_X^{(n)}(0)
\]
where $M_X^{(n)}(0)$ is the $n$-th derivative of $M_X(t)$ evaluated at
$t = 0$.
\end{theorem}

\begin{proof}
Assume that we can exchange the expectation and differentiation
operands. Then
\[
M_X^{(n)}(t)
=
\E\big[X^n e^{tX}\big]
\]
which when evaluated at $t = 0$ yields $M_X^{(n)}(0) = \E[X^n]$ as
required.
\end{proof}

\begin{theorem}
\label{thm:chernoff:two_random_var_same_distribution}
Let $X$ and $Y$ be two random variables. If for some $\delta > 0$ we
have
\[
M_X(t)
=
M_Y(t)
\]
for all $t \in (-\delta, \delta)$, then $X$ and $Y$ have the same
distribution.
\end{theorem}

\begin{theorem}
\label{thm:chernoff:sum_product_of_moments}
Let $X$ and $Y$ be independent random variables. Then
\[
M_{X+Y}(t)
=
M_X(t) \cdot M_Y(t).
\]
\end{theorem}

\begin{proof}
If $X$ and $Y$ are independent, then so are $e^{tX}$ and
$e^{tY}$. Hence
%%
\begin{align*}
M_{X+Y}(t)
&=
\E\left[e^{t(X+Y)}\right] \\
&=
\E\left[e^{tX} \cdot e^{tY}\right] \\
&=
\E\left[e^{tX}\right] \cdot \E\left[e^{tY}\right] \\
&=
M_X(t) \cdot M_Y(t)
\end{align*}
%%
in which we used
Theorem~\ref{thm:moments:expectation_is_multiplicative}\index{expectation!multiplicative}.
\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Deriving Chernoff bounds}
\index{Chernoff bound}

For a random variable $X$ and for any $t > 0$, apply
Markov's\index{Markov's inequality} inequality to obtain
\[
\Pr(X \geq a)
=
\Pr(e^{tX} \geq e^{ta})
\leq
\frac{\E\left[e^{tX}\right]} {e^{ta}}.
\]
In particular, we can choose a value of $t$ that minimizes the latter
bound:
\[
\Pr(X \geq a)
\leq
\min_{t>0} \frac{\E\left[e^{tX}\right]} {e^{ta}}.
\]
We also have similar inequalities for any $t < 0$. That is,
\[
\Pr(X \leq a)
=
\Pr(e^{tX} \geq e^{ta})
\leq
\frac{\E\left[e^{tX}\right]} {e^{ta}}
\]
and therefore
\[
\Pr(X \leq a)
\leq
\min_{t<0} \frac{\E\left[e^{tX}\right]}{e^{ta}}.
\]
By choosing suitable values for $t$, we obtain bounds for specific
distributions. The name
\emph{Chernoff bounds}\index{Chernoff bound} collectively refers to
bounds derived via the above approach.

A \emph{Poisson trial}\index{trial!Poisson} is a
\emph{Bernoulli trial}\index{trial!Bernoulli} with a variable
probability of success. Recall that in a
Bernoulli\index{trial!Bernoulli} trial we have a success probability
of $p$ and a failure probability $1 - p$. In a sequence of
Bernoulli\index{trial!Bernoulli} trials, each trial has the same
success probability $p$. Now consider a sequence of
Bernoulli\index{trial!Bernoulli} trial where the $i$-th trial has
success probability $p_i$ and failure probability $1 - p_i$. The
success probability is not fixed for the entire sequence of trials. We
refer to such Bernoulli\index{trial!Bernoulli} trials with variable
success probabilities as \emph{Poisson trials}\index{trial!Poisson}.

Let $X_1, \dots, X_n$ be a sequence of $n$ independent
Poisson\index{Poisson} trials with $\Pr(X_i = 1) = p_i$ and
$\E[X_i] = p_i$. Write $X = \sum_i X_i$ and use the linearity of
expectations to obtain
\[
\mu
=
\E[X]
=
\E\left[\sum_i X_i\right]
=
\sum_i \E[X_i]
=
\sum_i p_i.
\]
The moment generating\index{function!moment generating} function of
each $X_i$ is
%%
\begin{align*}
M_{X_i}(t)
&=
\E\left[e^{tX_i}\right] \\
&=
p_i e^t + (1 - p_i) \\
&=
1 + p_i (e^t - 1) \\
&\leq
\exp\left(p_i (e^t - 1)\right).
\end{align*}
%%
In the latter inequality, we used the result that if $y \in \R$ then
$1 + y \leq e^y$. Apply
Theorem~\ref{thm:chernoff:sum_product_of_moments} to get
%%
\begin{equation}
\label{eq:chernoff:upper_bound_moment_generating_function}
\begin{split}
M_X(t)
&=
\prod_i M_{X_i}(t) \\
&\leq
\prod_i e^{p_i (e^t - 1)} \\
&=
\exp\left\{\sum_i p_i (e^t - 1)\right\} \\
&=
e^{(e^t - 1)\mu}
\end{split}
\end{equation}
%%
where $\mu = \E[X]$.

We are now ready to derive bounds on the probability that $X$ deviates
from its expectation $\mu$ by $\delta\mu$ or more, for some $\delta > 0$.

\begin{theorem}
\label{thm:chernoff:Chernoff_bounds}
Let $X_1, \dots, X_n$ be independent Poisson\index{trial!Poisson}
trials such that $\Pr(X_i = 1) = p_i$. Write $X = \sum_i X_i$ and let
$\mu = \E[X]$. Then we have the following bounds.
%%
\begin{enumerate}
\item For any $\delta > 0$,
  \[
  \Pr\big(X \geq (1 + \delta)\mu\big)
  <
  \left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^\mu.
  \]

\item For any $0 < \delta \leq 1$,
  \[
  \Pr\big(X \geq (1 + \delta)\mu\big)
  \leq
  \exp\left\{-\frac{\mu\delta^2} {3}\right\}.
  \]

\item For any $R \geq 6\mu$,
  \[
  \Pr(X \geq R)
  \leq
  2^{-R}.
  \]
\end{enumerate}
\end{theorem}

\begin{proof}
To prove part~1, note that if $t > 0$ then
\[
\Pr\big(X \geq (1 + \delta)\mu\big)
=
\Pr\left(e^{tX} \geq e^{t(1 + \delta)\mu}\right)
\]
where $\exp\big(t(1 + \delta)\mu\big) > 0$. Applying
Markov's\index{Markov's inequality} inequality and the moment generating
function~\eqref{eq:chernoff:upper_bound_moment_generating_function}
yield
%%
\begin{align*}
\Pr\left(e^{tX} \geq e^{t(1 + \delta)\mu}\right)
&\leq
\frac{\E\big[e^{tX}\big]} {e^{t(1 + \delta)\mu}} \\[4pt]
&\leq
\frac{e^{(e^t - 1)\mu}} {e^{t(1 + \delta)\mu}}.
\end{align*}
%%
For any $\delta > 0$, write $t = \ln(1 + \delta) > 0$ to get
%%
\begin{equation}
\label{eq:chernoff:upper_bound_one_standard_deviation}
\Pr\left(X \geq (1 + \delta)\mu\right)
\leq
\left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^\mu.
\end{equation}

To prove part~2, note that for any $0 < \delta \leq 1$ we need to show
that
\[
\frac{e^\delta} {(1 + \delta)^{1 + \delta}}
\leq
e^{-\delta^2 / 3}.
\]
Take the logarithm of both sides to obtain the equivalent inequality
\[
f(\delta)
=
\delta - (1 + \delta) \cdot \ln(1 + \delta) + \frac{\delta^2}{3}
\leq
0.
\]
Use the first and second derivatives of $f(\delta)$ to show that
whenever $0 < \delta \leq 1$ then we have $f(\delta) \leq 0$. Conclude
via the last result that
\[
\Pr\big(X \geq (1 + \delta)\mu\big)
\leq
e^{-\mu\delta^2 / 3}
\]
whenever $0 < \delta \leq 1$.

For part~3, we get
\[
\left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^\mu
\leq
\left(\frac{e}{1 + \delta}\right)^{\mu(1 + \delta)}
\]
from
%%
\begin{align}
1 \leq e
&\implies
e^\delta \leq e^{1 + \delta} \notag\\[4pt]
&\implies
\left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^\mu
\leq
\left(\frac{e^{1 + \delta}} {(1 + \delta)^{1+\delta}}\right)^\mu \notag\\[4pt]
\label{eq:chernoff:Chernoff_bounds:intermediat_1}
&\implies
\left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^\mu
\leq
\left(\frac{e} {1 + \delta}\right)^{\mu(1 + \delta)}.
\end{align}
%%
Write $R = \mu(1 + \delta)$. If $R \geq 6\mu$, then $\delta \geq 5$
and hence
%%
\begin{align}
5 \leq \delta
&\implies
\frac{1}{1 + \delta} \leq \frac{1}{6} \notag\\[4pt]
&\implies
\frac{e}{1 + \delta} \leq \frac{e}{6} \notag\\[4pt]
\label{eq:chernoff:Chernoff_bounds:intermediate_2}
&\implies
\left(\frac{e}{1 + \delta}\right)^R \leq \left(\frac{e}{6}\right)^R.
\end{align}
%%
Furthermore,
%%
\begin{align}
\left(\frac{e}{3}\right)^R \leq 1
&\implies
2^R \left(\frac{e}{6}\right)^R \leq 1 \notag\\[4pt]
\label{eq:chernoff:Chernoff_bounds:intermediate_3}
&\implies
\left(\frac{e}{6}\right)^R \leq 2^{-R}.
\end{align}
%%
Combine~\eqref{eq:chernoff:upper_bound_one_standard_deviation}
to~\eqref{eq:chernoff:Chernoff_bounds:intermediate_3} to get
%%
\begin{align*}
\Pr\big(X \geq (1 + \delta)\mu\big)
&\leq
\left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^\mu \\[4pt]
&\leq
\left(\frac{e}{1 + \delta}\right)^{(1 + \delta)\mu} \\[4pt]
&\leq
\left(\frac{e}{6}\right)^R \\[4pt]
&\leq
2^{-R}
\end{align*}
%%
where $R = (1 + \delta)\mu$.
\end{proof}

\begin{theorem}
Let $X_1, \dots, X_n$ be independent Poisson\index{trial!Poisson}
trials such that $\Pr(X_i = 1) = p_i$. Write $X = \sum_i X_i$ and let
$\mu = \E[X]$. Then for any $0 < \delta < 1$ we have
%%
\begin{enumerate}
\item $
  \Pr\big(X \leq (1 - \delta)\mu\big)
  \leq
  \left(\displaystyle{
    \frac{e^{-\delta}} {(1 - \delta)^{1 - \delta}}
  }\right)^\mu
$

\item $
  \Pr\big(X \leq (1 - \delta)\mu\big)
  \leq
  e^{-\mu\delta^2 / 2}
$
\end{enumerate}
\end{theorem}

\begin{proof}
The proof is similar to that of Theorem~\ref{thm:chernoff:Chernoff_bounds}.
\end{proof}

\begin{corollary}
Let $X_1, \dots, X_n$ be independent Poisson\index{trial!Poisson}
trials such that $\Pr(X_i = 1) = p_i$. Write $X = \sum_i X_i$ and let
$\mu = \E[X]$. For any $0 < \delta < 1$ we have
\[
\Pr\big(|X - \mu| \geq \delta\mu\big)
\leq
2e^{-\mu\delta^2 / 3}.
\]
\end{corollary}

\begin{proof}
Follows from Theorems~\ref{thm:chernoff:sum_product_of_moments}
and~\ref{thm:chernoff:Chernoff_bounds}.
\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Bounds for special cases}
\index{Chernoff bound}

\begin{theorem}
\label{thm:chernoff:bound_for_X_i_1_minus_1}
Let $X_1, \dots, X_n$ be independent random variables such that
\[
\Pr(X_i = 1)
=
\Pr(X_i = -1)
=
\frac{1}{2}
\]
and write $X = \sum_i X_i$. For any $a > 0$, we have
\[
\Pr(X \geq a)
\leq
e^{-a^2 / 2n}.
\]
\end{theorem}

\begin{proof}
For all $t$, the Taylor\index{Taylor!series} series expansion for
$e^t$ is
\[
e^t
=
\sum_{i=0}^\infty \frac{t^i}{i!}.
\]
If $t > 0$, then
\[
e^t
=
\sum_{i=0}^\infty \frac{t^i}{i!}
\qquad
\text{and}
\qquad
e^{-t}
=
\sum_{i=0}^\infty (-1)^i \frac{t^i}{i!}.
\]
Use the above Taylor\index{Taylor!series} series expansions to obtain
%%
\begin{align*}
\E\big[e^{tX_i}\big]
&=
\frac{1}{2}e^t + \frac{1}{2}e^{-t} \\[4pt]
&=
\sum_{i \geq 0} \frac{t^{2i}}{(2i)!} \\[4pt]
&\leq
\sum_{i \geq 0} \frac{(t^2/2)^i}{i!} \\[4pt]
&=
e^{t^2/2}.
\end{align*}
%%
The moment generating function of $X$ is
\[
\E\big[e^{tX}\big]
=
\prod_i \E\big[e^{tX_i}\big]
\leq
e^{t^2n / 2}
\]
which together with Markov's\index{Markov's inequality} inequality yield
%%
\begin{align*}
\Pr(X \geq a)
&=
\Pr(e^{tX} \geq e^{ta}) \\[4pt]
&\leq
\frac{\E\big[e^{tX}\big]} {e^{ta}} \\[4pt]
&\leq
e^{t^2n/2 - ta}.
\end{align*}
%%
Set $t = a/n$ and we have $\Pr(X \geq a) \leq e^{-a^2/2n}$.
\end{proof}

By symmetry\index{symmetry}, we also have
\[
\Pr(X \leq -a)
\leq
e^{-a^2 / 2n}
\]
and the following corollary holds.

\begin{corollary}
Let $X_1, \dots, X_n$ be independent random variables such that
\[
\Pr(X_i = 1)
=
\Pr(X_i = -1)
=
\frac{1}{2}
\]
and write $X = \sum_i X_i$. For any $a > 0$, we have
\[
\Pr\big(|X| \geq a\big)
\leq
2e^{-a^2 / 2n}.
\]
\end{corollary}

\begin{corollary}
Let $Y_1, \dots, Y_n$ be independent random variables such that
\[
\Pr(Y_i = 1)
=
\Pr(Y_i = 0)
=
\frac{1}{2}.
\]
Write $Y = \sum_i Y_i$ and let $\mu = \E[Y] = n/2$. Then for any
$a > 0$ and $\delta > 0$, we have
\[
\Pr(Y \geq \mu + a)
\leq
e^{-2a^2 / n}
\]
and
\[
\Pr\big(Y \geq (1 + \delta)\mu\big)
\leq
e^{-\delta^2 \mu}.
\]
\end{corollary}

\begin{proof}
Let $X_1, \dots, X_n$ be independent random variables and consider the
transformations $Y_i = (X_i + 1) / 2$. Write $X = \sum_i X_i$. Then
\[
Y
=
\sum_i Y_i
=
\frac{1}{2} \sum_i X_i + \frac{n}{2}
=
\frac{1}{2} X + \mu
\]
and apply Theorem~\ref{thm:chernoff:bound_for_X_i_1_minus_1} to obtain
\[
\Pr(Y \geq \mu + a)
=
\Pr(X \geq 2a)
\leq
e^{-4a^2 / 2n}.
\]
Set $a = \delta\mu = \delta n/2$ and again apply
Theorem~\ref{thm:chernoff:bound_for_X_i_1_minus_1} to get
%%
\begin{align*}
\Pr\big(Y \geq (1 + \delta)\mu\big)
&=
\Pr(X \geq 2\delta\mu) \\
&\leq
e^{-2\delta^2\mu^2 / n} \\
&=
e^{-\delta^2\mu}
\end{align*}
%%
as required.
\end{proof}

\begin{corollary}
Let $Y_1, \dots, Y_n$ be independent random variables such that
\[
\Pr(Y_i = 1)
=
\Pr(Y_i = 0)
=
\frac{1}{2}.
\]
Write $Y = \sum_i Y_i$ and let $\mu = \E[Y] = n/2$ Then for any
$0 < a < \mu$ and any $0 < \delta < 1$, we have
\[
\Pr(Y \leq \mu - a)
\leq
e^{-2a^2 / n}
\]
and
\[
\Pr\big(Y \leq (1 - \delta)\mu\big)
\leq
e^{-\delta^2 \mu}.
\]
\end{corollary}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Problems}

\begin{problem}
\item Consider the following problems on independent
  binomial\index{random variable!binomial} random variables.
  %%
  \begin{enumerate}
  \item Determine the moment generating function for the binomial
    random variable $B(n,p)$.

  \item Let $X$ and $Y$ be independent binomial random variables
    $B(n,p)$ and $B(m,p)$, respectively. Use part~1 to obtain the
    moment generating function of $X + Y$.

  \item What can we conclude from the form of the moment generating
    function of $X + Y$?
  \end{enumerate}

\item This chapter implicitly assumes the following extension of the
  Chernoff\index{Chernoff bound} bound. Let $X_1, \dots, X_n$ be
  independent $0$-$1$ random variables. Write $X = \sum_i X_i$ and
  $\mu = \E[X]$. Let $\mu_L$ and $\mu_H$ be such that
  $\mu_L \leq \mu \leq \mu_H$. For any $\delta > 0$, show that
  \[
  \Pr\big(X \geq (1 + \delta)\mu_H\big)
  \leq
  \left(\frac{e^\delta} {(1 + \delta)^{1 + \delta}}\right)^{\mu_H}.
  \]
  If $0 < \delta < 1$, show that
  \[
  \Pr\big(X \leq (1 - \delta)\mu_L\big)
  \leq
  \left(\frac{e^{-\delta}} {(1 - \delta)^{1 - \delta}}\right)^{\mu_L}.
  \]

\item Let $X_1, \dots, X_n$ be independent integers chosen
  uniformly\index{random!uniform} at random from $\{0, 1, 2\}$. If
  $X = \sum_i X_i$ and $0 < \delta < 1$, derive a Chernoff bound for
  $\Pr\big(X \geq (1 + \delta)n\big)$ and
  $\Pr\big(X \leq (1 - \delta)n\big)$.

\item Consider a collection $X_1, \dots, X_n$ of independent
  Poisson\index{trial!Poisson} trials such that $\Pr(X_i = 1) =
  p$. Write $X = \sum_i X_i$ to obtain $\E[X] = pn$. Let
  \[
  F(x,p)
  =
  x \cdot \ln(x/p) + (1 - x) \cdot \ln\big((1 - x) / (1 - p)\big).
  \]
  %%
  \begin{enumerate}
  \item If $p < x \leq 1$, show that
    \[
    \Pr(X \geq xn)
    \leq
    e^{-n F(x,p)}.
    \]

  \item If $x > 0$ and $p < 1$, show that $F(x,p) - 2(x - p)^2 \geq 0$.
    \emph{Hint:} take the second derivative\index{derivative} of
    $F(x,p) - 2(x - p)^2$ with respect to $x$.

  \item Use parts~1 and~2 to show that
    \[
    \Pr\big(X \geq (p + \varepsilon)n\big)
    \leq
    e^{-2n\varepsilon^2}.
    \]

  \item Use symmetry\index{symmetry} to show that
    \[
    \Pr\big(X \leq (p - \varepsilon)n\big)
    \leq
    e^{-2n\varepsilon^2}
    \]
    and conclude that
    \[
    \Pr\big(|X - pn| \geq \varepsilon n\big)
    \leq
    2e^{-2n\varepsilon^2}.
    \]
  \end{enumerate}
\end{problem}
